Interval statistic is library to calculate interval estimations of an average and a variance.
There are several algorithms to test the distribution:
chi-square goodness-fit test
Load Libraries
In [2]:
using IntervalStatistic
using Distributions
using ValidatedNumerics
using Plots
pyplot(reuse=true)
srand(10)
Out[2]:
In [3]:
function show_result(value_check_label)
values, check, label = value_check_label
isDistr = IntervalStatistic.isDistribution(values, check)
println(label, ": ", isDistr)
hist = IntervalStatistic.Check.histogram(values, check)
intervals = [i[1] for i in hist]
println(label, " bin count: ", size(intervals, 1))
midles, weights = Real[mid(i) for i in intervals], Real[i[2] for i in hist]
all_count = sum(weights)
plot!(x -> midles[round(Int, x)], x -> begin
i = round(Int,x)
weights[i]/diam(intervals[i])/all_count
end,
1:size(midles, 1),
label=label
)
end
Out[3]:
Generate samples of normal distribution
In [4]:
d = Normal()
length = 500
confidence_probability = 0.95
values = rand(d, length)
mu, sigma = params(d)
average = reduce(+, values) / length
Out[4]:
In [5]:
result_by_sturges_chi_square = (
values,
IntervalStatistic.Check.SturgesChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Sturges k formula"
)
result_by_scott_chi_square = (
values,
IntervalStatistic.Check.ScottChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Scott h formula"
)
result_by_taylor_chi_square = (
values,
IntervalStatistic.Check.TaylorChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Taylor h formula"
)
result_by_freedmandiaconis_chi_square = (
values,
IntervalStatistic.Check.FreedmanDiaconisChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Freedman-Diaconis h formula"
)
result_by_doane_chi_square = (
values,
IntervalStatistic.Check.DoaneChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Doane k formula"
)
result_by_wichard_chi_square = (
values,
IntervalStatistic.Check.WichardChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Wichard k formula"
)
result_by_large_n_chi_square = (
values,
IntervalStatistic.Check.LargeNChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with k formula for large n"
)
result_by_modified_chi_square_for_normal_dist_10_bins = (
values,
IntervalStatistic.Check.ChiSquareNormalCheck(0.05, 10, mu, sigma),
"Modified chi-square for normal dist with 10 bins"
)
result_by_modified_chi_square_for_normal_dist_11_bins = (
values,
IntervalStatistic.Check.ChiSquareNormalCheck(0.05, 11, mu, sigma),
"Modified chi-square for normal dist with 11 bins"
)
plot((mu - 3*sigma):(sigma*0.01):(mu + 3*sigma), (x) -> pdf(d, x), label="pdf")
show_result(result_by_sturges_chi_square)
show_result(result_by_large_n_chi_square)
show_result(result_by_wichard_chi_square)
show_result(result_by_doane_chi_square)
show_result(result_by_freedmandiaconis_chi_square)
show_result(result_by_scott_chi_square)
show_result(result_by_taylor_chi_square)
show_result(result_by_modified_chi_square_for_normal_dist_10_bins)
show_result(result_by_modified_chi_square_for_normal_dist_11_bins)
Out[5]:
Generate samples of normal distribution with mu=100 sigma=4
In [6]:
d = Normal(100, 4)
length = 500
confidence_probability = 0.95
values = rand(d, length)
mu, sigma = params(d)
average = reduce(+, values) / length
Out[6]:
In [7]:
result_by_sturges_chi_square = (
values,
IntervalStatistic.Check.SturgesChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Sturges k formula"
)
result_by_scott_chi_square = (
values,
IntervalStatistic.Check.ScottChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Scott h formula"
)
result_by_taylor_chi_square = (
values,
IntervalStatistic.Check.TaylorChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Taylor h formula"
)
result_by_freedmandiaconis_chi_square = (
values,
IntervalStatistic.Check.FreedmanDiaconisChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Freedman-Diaconis h formula"
)
result_by_doane_chi_square = (
values,
IntervalStatistic.Check.DoaneChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Doane k formula"
)
result_by_wichard_chi_square = (
values,
IntervalStatistic.Check.WichardChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with Wichard k formula"
)
result_by_large_n_chi_square = (
values,
IntervalStatistic.Check.LargeNChiSquareCheck(0.05, Normal(mu, sigma)),
"Chi-square with k formula for large n"
)
result_by_modified_chi_square_for_normal_dist_10_bins = (
values,
IntervalStatistic.Check.ChiSquareNormalCheck(0.05, 10, mu, sigma),
"Modified chi-square for normal dist with 10 bins"
)
result_by_modified_chi_square_for_normal_dist_11_bins = (
values,
IntervalStatistic.Check.ChiSquareNormalCheck(0.05, 11, mu, sigma),
"Modified chi-square for normal dist with 11 bins"
)
plot((mu - 3*sigma):(sigma*0.01):(mu + 3*sigma), (x) -> pdf(d, x), label="pdf")
show_result(result_by_sturges_chi_square)
show_result(result_by_large_n_chi_square)
show_result(result_by_wichard_chi_square)
show_result(result_by_doane_chi_square)
show_result(result_by_freedmandiaconis_chi_square)
show_result(result_by_scott_chi_square)
show_result(result_by_taylor_chi_square)
show_result(result_by_modified_chi_square_for_normal_dist_10_bins)
show_result(result_by_modified_chi_square_for_normal_dist_11_bins)
Out[7]: